package org.yinxue.spider.core.spider;

import org.jsoup.Jsoup;
import org.yinxue.spider.core.downloader.Downloader;
import org.yinxue.spider.core.model.ATag;
import org.yinxue.spider.core.model.ImgTag;
import org.yinxue.spider.core.parser.Parser;
import org.yinxue.spider.core.util.SetLinkedList;
import org.yinxue.spider.core.util.StringUtils;
import org.yinxue.spider.core.util.UrlUtils;

import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * 爬虫工具 <br>
 *
 * @author zengjian
 * @create 2018-07-14 16:17
 * @since 1.0.0
 */
public class Spider {

    private Downloader downloader = new Downloader();
    private Map<String, String> htmlCache = new HashMap<>();
    private Parser parser = new Parser();
    private SetLinkedList<String> failedUrls = new SetLinkedList();

    public String parseTitle(String url) {
        String html = downloadHtml(url);
        return Jsoup.parse(html).title();
    }

    public List<ATag> parseATag(String url) {
        if (StringUtils.isEmpty(url)){
            return Collections.emptyList();
        }
        String html = downloadHtml(url);
        String baseUrl = UrlUtils.parseBaseUrl(url);
        return parser.parseATag(html, baseUrl);
    }

    public List<ImgTag> parseImgTag(String url) {
        if (StringUtils.isEmpty(url)){
            return Collections.emptyList();
        }
        String html = downloadHtml(url);
        String baseUrl = UrlUtils.parseBaseUrl(url);
        return parser.parseImgTag(html, baseUrl);
    }



    public String downloadHtml(String url) {
        if (htmlCache.containsKey(url)) {
            return htmlCache.get(url);
        }
        String html = downloader.downloadHtml(url);
        htmlCache.put(url, html);
        return html;
    }
}
